tpe2d <- function(d,verbose=TRUE) { hc <- hclust(d,"single") n <- length(hc$height) d <- as.matrix(d) ind <- list() x <- list() for (i in 1:n) { if (verbose) { message("Iteration ",i," of ",n) } dmin <- hc$height[i] pair <- hc$merge[i,] if (max(pair)<0) { ind[[i]] <- -pair x[[i]] <- rbind(c(0,0),c(dmin,0)) } else if (min(pair)<0) { c1 <- -min(pair) c2 <- ind[[max(pair)]] ind[[i]] <- c(c1,c2) if (length(c2)==2) { x[[i]] <- cmdscale(d[ind[[i]],ind[[i]]]) } else { x[[i]] <- align2d(d[c1,c2,drop=FALSE],matrix(0,1,2),x[[max(pair)]],dmin) } } else { c1 <- ind[[pair[1]]] c2 <- ind[[pair[2]]] ind[[i]] <- c(c1,c2) x[[i]] <- align2d(d[c1,c2,drop=FALSE],x[[pair[1]]],x[[pair[2]]],dmin) } } x <- x[[n]][match(1:(n+1),ind[[n]]),] rownames(x) <- hc$labels x } align2d <- function(d,x1,x2,dmin) { n1 <- dim(x1)[1] n2 <- dim(x2)[1] x1 <- scale(x1,scale=FALSE) x2 <- scale(x2,scale=FALSE) obj <- function(par) { td <- as.matrix(dist(rigid2d(x1,x2,par)))[1:n1,(n1+1):(n1+n2)] sum((td-d)^2) } pen <- function(par) { tdmin <- min(as.matrix(dist(rigid2d(x1,x2,par)))[1:n1,(n1+1):(n1+n2)]) if (dmin>tdmin) { Inf } else { (tdmin-dmin)^2 } } par0 <- c(0,dmin+2*max(dist(x1),dist(x2)),0) par <- sumt(obj,pen,par0) rigid2d(x1,x2,par) } rot2d <- function(theta) { rbind(c(cos(theta),sin(theta)),c(-sin(theta),cos(theta))) } rigid2d <- function(x1,x2,par) { rbind(x1,sweep(x2%*%rot2d(par[1]),2,par[-1])) } sumt <- function(obj,pen,par0) { penobj <- function(lambda) { function(par) { obj(par)+lambda*pen(par) } } lambda <- 1 repeat { par <- optim(par0,penobj(lambda))$par if (max(abs(par-par0))

In [20]:
import fastcluster
import numpy as np
import numpy.random as npr

In [2]:
from sklearn.datasets import make_blobs

In [116]:
X,y = make_blobs(1000)

In [117]:
import pylab as pl
%matplotlib inline

In [118]:
pl.scatter(X[:,0],X[:,1],c=y,linewidths=0)


Out[118]:
<matplotlib.collections.PathCollection at 0x10fbb7f90>

In [119]:
%timeit linkage = fastcluster.linkage(X)


100 loops, best of 3: 4.75 ms per loop

In [120]:
linkage = fastcluster.linkage(X)

In [121]:
linkage.shape


Out[121]:
(999, 4)

In [122]:
linkage[0]


Out[122]:
array([  6.80000000e+01,   9.96000000e+02,   6.84473577e-04,
         2.00000000e+00])

In [43]:
fastcluster.


  File "<ipython-input-43-cedd8e8a580b>", line 1
    fastcluster.
                ^
SyntaxError: invalid syntax

In [123]:
pl.plot(linkage[:,2])


Out[123]:
[<matplotlib.lines.Line2D at 0x128e37250>]

In [124]:
pl.plot(linkage[-10:,2])


Out[124]:
[<matplotlib.lines.Line2D at 0x1289ca990>]

In [125]:
from scipy.cluster import hierarchy

In [126]:
hierarchy.dendrogram(linkage)


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-126-4313c87158fb> in <module>()
----> 1 hierarchy.dendrogram(linkage)

/Users/joshuafass/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/scipy/cluster/hierarchy.pyc in dendrogram(Z, p, truncate_mode, color_threshold, get_leaves, orientation, labels, count_sort, distance_sort, show_leaf_counts, no_plot, no_labels, color_list, leaf_font_size, leaf_rotation, leaf_label_func, no_leaves, show_contracted, link_color_func, ax)
   2197                          leaf_rotation=leaf_rotation,
   2198                          contraction_marks=contraction_marks,
-> 2199                          ax=ax)
   2200 
   2201     return R

/Users/joshuafass/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/scipy/cluster/hierarchy.pyc in _plot_dendrogram(icoords, dcoords, ivl, p, n, mh, orientation, no_labels, color_list, leaf_font_size, leaf_rotation, contraction_marks, ax)
   1869 
   1870     if trigger_redraw:
-> 1871         matplotlib.pylab.draw_if_interactive()
   1872 
   1873 _link_line_colors = ['g', 'r', 'c', 'm', 'y', 'k']

/Applications/Canopy.app/appdata/canopy-1.4.1.1975.macosx-x86_64/Canopy.app/Contents/lib/python2.7/site-packages/IPython/utils/decorators.pyc in wrapper(*args, **kw)
     41     def wrapper(*args,**kw):
     42         wrapper.called = False
---> 43         out = func(*args,**kw)
     44         wrapper.called = True
     45         return out

/Users/joshuafass/Library/Enthought/Canopy_64bit/User/lib/python2.7/site-packages/matplotlib/backends/backend_macosx.pyc in draw_if_interactive()
    235         figManager =  Gcf.get_active()
    236         if figManager is not None:
--> 237             figManager.canvas.invalidate()
    238 
    239 

AttributeError: 'FigureCanvasAgg' object has no attribute 'invalidate'

In [127]:
pl.plot(linkage[:,3])


Out[127]:
[<matplotlib.lines.Line2D at 0x124f0b9d0>]

In [128]:
linkage[0][:2]


Out[128]:
array([  68.,  996.])

In [21]:
merge = np.array(linkage[:,:2],dtype=int)

In [23]:
height = linkage[:,2]

In [33]:
fastcluster.expand_dims(


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-33-57588e3eadfb> in <module>()
----> 1 fastcluster.linkage_wrap(linkage)

TypeError: function takes exactly 4 arguments (1 given)

In [32]:
def tpe2d(d):
    hc = fastcluster(d)
    merge = np.array(linkage[:,:2],dtype=int)
    height = linkage[:,2]
    n = len(height) # n = len(d) - 1
    ind = np.zeros((n,2))
    x = np.zeros((n,2,2))
    for i in range(n):
        dmin = height[i]
        pair = merge[i]
        if max(pair) < 0:
            ind[i] = -pair
            #x[[i]] <- rbind(c(0,0),c(dmin,0))
            x[i] = np.zeros((2,2))
            x[i,1,0]=dmin
            
        else:
            if min(pair) < 0:
                c1 = -min(pair)
                c2 = ind[max(pair)]
                ind[i] = np.array((c1,c2))
                
                
    return

In [25]:
def align2d(d,x1,x2,dmin):
    return

In [26]:
def rot2d(theta):
    return

In [27]:
def rigid2d(x1,x2,par):
    return

In [28]:
def sumt(obj,pen,par0):
    return

In [29]:
np.min(merge)


Out[29]:
0

In [30]:
np.max(merge)


Out[30]:
1997

In [129]:
linkage = fastcluster.linkage(X)

In [139]:
linkage = hierarchy.linkage(X)

In [ ]:
hierarchy.linkage(

In [140]:
merge=np.array(linkage[:,:2],dtype=int)
height = linkage[:,2]

In [141]:
np.max(merge)


Out[141]:
1997

In [144]:
n = len(height)

In [145]:
S = [[i] for i in range(n)]

In [146]:
len(S)


Out[146]:
999

In [147]:
len(merge),len(height)


Out[147]:
(999, 999)

In [148]:
np.max(merge)


Out[148]:
1997

In [164]:
I = [np.array(range(n))]

In [178]:
from sklearn.manifold import MDS

In [184]:
mds = MDS(verbose=1,dissimilarity='precomputed')

In [181]:
from scipy.spatial import distance
pdist = distance.squareform(distance.pdist(X))

In [183]:
pdist.shape


Out[183]:
(1000, 1000)

In [194]:
X_ = mds.fit_transform(pdist[:500,:500])


breaking at iteration 156 with stress 8871.78062721
breaking at iteration 99 with stress 1624.83022769
breaking at iteration 132 with stress 9769.84298186
breaking at iteration 109 with stress 1626.71511577

In [195]:
pl.scatter(X_[:,0],X_[:,1],
           c=y[:500],alpha=0.5,linewidths=0)


Out[195]:
<matplotlib.collections.PathCollection at 0x1273a8dd0>

In [168]:
for i in range(n-1):
    a,b = merge[i]
    d = height[i]
    S.append(S[a] + S[b])
    I.append(np.hstack((I[i][(I[i] != a) * (I[i] != b)],1)))
    U =


---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-168-40a77954e396> in <module>()
      3     d = height[i]
      4     S.append(S[a] + S[b])
----> 5     I.append(np.hstack((I[i][I[i] != a and I[i] != b],n+i)))

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [152]:
S[-1]


Out[152]:
[828, 176, 475, 408, 20, 467, 253, 434, 13, 643]

In [153]:
(S[1]+S[2])+S[3]


Out[153]:
[1, 2, 3]

In [203]:
X = npr.randn(20000,10)

In [204]:
%timeit linkage = hierarchy.linkage(X)


1 loops, best of 3: 7.15 s per loop

In [205]:
%timeit linkage = fastcluster.linkage(X)


1 loops, best of 3: 5.27 s per loop

In [206]:
%timeit linkage = fastcluster.linkage_vector(X)


1 loops, best of 3: 2.82 s per loop

In [207]:
X,y = make_blobs(20000)

In [208]:
pl.scatter(X[:,0],X[:,1],c=y,linewidths=0)


Out[208]:
<matplotlib.collections.PathCollection at 0x1288646d0>

In [209]:
%timeit linkage = fastcluster.linkage_vector(X)


1 loops, best of 3: 1.36 s per loop

In [210]:
%timeit linkage = fastcluster.linkage(X)


1 loops, best of 3: 4.86 s per loop

In [211]:
%timeit linkage = hierarchy.linkage(X)


1 loops, best of 3: 6.11 s per loop

In [ ]: